1 Data Loading and Preparation

data_2020 <- read.csv("2020.csv")
data_2025 <- readRDS("crew_dataset_keys_analysis_ready.rds")

# Age group ordering
age_order <- c("Under 25", "25-29", "30-34", "35-39", "40-44", "45-49",
               "50-54", "55-59", "60-64", "65 or older")

# Standardize gender function
standardize_gender <- function(gender_col) {
  gender_col <- tolower(trimws(gender_col))
  case_when(
    gender_col %in% c("female", "woman") ~ "Women",
    gender_col %in% c("male", "man") ~ "Men",
    grepl("nonbinary|non-binary|gender variant", gender_col) ~ "Nonbinary",
    TRUE ~ as.character(gender_col)
  )
}

# Prepare 2020 data
df_2020 <- data_2020 %>%
  mutate(
    gender = standardize_gender(m27),
    age = factor(m25, levels = age_order)
  ) %>%
  filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")

# Prepare 2025 data (keep ALL columns for plotting)
df_2025 <- data_2025 %>%
  mutate(
    gender = standardize_gender(m27),
    age = factor(m25, levels = age_order)
  ) %>%
  filter(!is.na(gender) & !is.na(age) & gender != "" & age != "Decline to Answer")

# Color palette
gender_colors <- c("Women" = "#e74c3c", "Men" = "#3498db", "Nonbinary" = "#9b59b6")

cat("Data loaded successfully!\n")
## Data loaded successfully!
cat("2020 sample size:", nrow(df_2020), "\n")
## 2020 sample size: 2587
cat("2025 sample size:", nrow(df_2025), "\n")
## 2025 sample size: 2412

2 Women-Only Weighting Setup

# Calculate 2020 women proportions by age (this is our weighting reference)
women_2020_age_counts <- df_2020 %>%
  filter(gender == "Women") %>%
  count(age, name = "count_2020") %>%
  mutate(prop_2020 = count_2020 / sum(count_2020))

# Show the 2020 women age distribution (our reference)
kable(women_2020_age_counts, 
      caption = "2020 Women Age Distribution (Reference for Weighting)",
      col.names = c("Age Group", "Count 2020", "Proportion 2020"),
      digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#3498db")
2020 Women Age Distribution (Reference for Weighting)
Age Group Count 2020 Proportion 2020
Under 25 38 0.018
25-29 164 0.077
30-34 271 0.127
35-39 325 0.153
40-44 292 0.137
45-49 263 0.123
50-54 266 0.125
55-59 268 0.126
60-64 172 0.081
65 or older 72 0.034
# Create lookup table for women's age-based weights
women_age_weights <- women_2020_age_counts %>%
  select(age, weight = prop_2020)

print("Women Age-Based Weights:")
## [1] "Women Age-Based Weights:"
print(women_age_weights)
##            age     weight
## 1     Under 25 0.01783200
## 2        25-29 0.07695917
## 3        30-34 0.12717034
## 4        35-39 0.15251056
## 5        40-44 0.13702487
## 6        45-49 0.12341624
## 7        50-54 0.12482403
## 8        55-59 0.12576255
## 9        60-64 0.08071328
## 10 65 or older 0.03378695

3 Weighting Functions and Themes

# Enhanced plotting theme for professional analysis
theme_compensation <- theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5, margin = margin(b = 20)),
    plot.subtitle = element_text(size = 12, hjust = 0.5, color = "gray40", margin = margin(b = 15)),
    axis.title = element_text(size = 12, face = "bold"),
    axis.text = element_text(size = 10),
    legend.title = element_text(size = 12, face = "bold"),
    legend.text = element_text(size = 10),
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_line(color = "gray90", size = 0.3),
    panel.grid.major.y = element_line(color = "gray90", size = 0.3),
    strip.text = element_text(size = 11, face = "bold"),
    plot.background = element_rect(fill = "white", color = NA),
    panel.background = element_rect(fill = "white", color = NA)
  )

# Color palette
comp_colors <- c("Women" = "#e74c3c", "Men" = "#3498db", "Nonbinary" = "#9b59b6")

# Helper function for formatting currency
format_currency <- function(x) {
  scales::dollar_format(scale = 1e-3, suffix = "K", accuracy = 1)(x)
}

# WEIGHTING FUNCTION FOR CATEGORICAL DATA WITH GLOBAL PERCENTAGES
apply_women_weighting <- function(data, question_col, show_breakdown = FALSE) {
  
  plot_data <- data %>%
    select(gender, age, response = all_of(question_col)) %>%
    filter(!is.na(response) & response != "")
  
  # Calculate total sample size for global percentages
  total_responses <- nrow(plot_data)
  
  # Separate men and women
  men_data <- plot_data %>% filter(gender == "Men")
  women_data <- plot_data %>% filter(gender == "Women")
  
  # Men percentages (NO weighting - weight = 1.0)
  men_summary <- men_data %>%
    count(response) %>%
    mutate(
      percent = n / sum(n) * 100,
      global_percent = n / total_responses * 100,
      gender = "Men",
      weighted_percent = percent  # No weighting for men
    )
  
  # Women percentages by age group
  women_by_age <- women_data %>%
    count(age, response) %>%
    group_by(age) %>%
    mutate(percent_in_age = n / sum(n) * 100) %>%
    ungroup()
  
  # Apply 2020 age weights to women's responses
  women_weighted <- women_by_age %>%
    left_join(women_age_weights, by = "age") %>%
    mutate(weighted_contribution = percent_in_age * weight) %>%
    group_by(response) %>%
    summarise(
      weighted_percent = sum(weighted_contribution, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    mutate(gender = "Women")
  
  # Combine results
  final_results <- bind_rows(
    men_summary %>% select(response, gender, weighted_percent),
    women_weighted %>% select(response, gender, weighted_percent)
  )
  
  if (show_breakdown) {
    cat("\n=== WEIGHTING BREAKDOWN ===\n")
    cat("Men (no weighting applied):\n")
    print(men_summary)
    cat("\nWomen (2020 age-weighted):\n")
    print(women_weighted)
  }
  
  return(final_results)
}

# WEIGHTING FUNCTION FOR NUMERIC DATA (COMPENSATION/SCALES)
apply_compensation_weighting <- function(data, salary_col, group_col = NULL) {
  
  # Convert salary column to numeric if it's character
  if (is.character(data[[salary_col]])) {
    data[[salary_col]] <- as.numeric(data[[salary_col]])
  }
  
  # Prepare data
  if (is.null(group_col)) {
    plot_data <- data %>%
      select(gender, age, salary = all_of(salary_col)) %>%
      filter(!is.na(salary) & salary > 0 & !is.infinite(salary))
  } else {
    plot_data <- data %>%
      select(gender, age, salary = all_of(salary_col), group = all_of(group_col)) %>%
      filter(!is.na(salary) & salary > 0 & !is.infinite(salary) & !is.na(group) & group != "")
  }
  
  # Separate men and women
  men_data <- plot_data %>% filter(gender == "Men")
  women_data <- plot_data %>% filter(gender == "Women")
  
  if (is.null(group_col)) {
    # Overall statistics
    men_stats <- men_data %>%
      summarise(
        mean_salary = mean(salary, na.rm = TRUE),
        median_salary = median(salary, na.rm = TRUE),
        gender = "Men"
      )
    
    # Women by age group, then weighted
    women_by_age <- women_data %>%
      group_by(age) %>%
      summarise(
        mean_salary = mean(salary, na.rm = TRUE),
        median_salary = median(salary, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      left_join(women_age_weights, by = "age") %>%
      filter(!is.na(weight))
    
    women_weighted <- women_by_age %>%
      summarise(
        mean_salary = sum(mean_salary * weight, na.rm = TRUE),
        median_salary = sum(median_salary * weight, na.rm = TRUE),
        gender = "Women",
        .groups = "drop"
      )
    
    return(bind_rows(men_stats, women_weighted))
    
  } else {
    # By group statistics
    men_stats <- men_data %>%
      group_by(group) %>%
      summarise(
        mean_salary = mean(salary, na.rm = TRUE),
        median_salary = median(salary, na.rm = TRUE),
        gender = "Men",
        .groups = "drop"
      )
    
    # Women by age and group, then weighted
    women_by_age_group <- women_data %>%
      group_by(age, group) %>%
      summarise(
        mean_salary = mean(salary, na.rm = TRUE),
        median_salary = median(salary, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      left_join(women_age_weights, by = "age") %>%
      filter(!is.na(weight))
    
    women_weighted <- women_by_age_group %>%
      group_by(group) %>%
      summarise(
        mean_salary = sum(mean_salary * weight, na.rm = TRUE),
        median_salary = sum(median_salary * weight, na.rm = TRUE),
        gender = "Women",
        .groups = "drop"
      )
    
    return(bind_rows(men_stats, women_weighted))
  }
}

# WEIGHTING FUNCTION FOR TRUE/FALSE DATA
apply_boolean_weighting <- function(data, question_col, show_breakdown = FALSE) {
  
  plot_data <- data %>%
    select(gender, age, response = all_of(question_col)) %>%
    filter(!is.na(response)) %>%
    mutate(response = ifelse(response, "Yes", "No"))
  
  # Use the categorical weighting function
  return(apply_women_weighting(plot_data %>% 
    select(gender, age, response), "response", show_breakdown))
}

cat("Weighting functions loaded successfully!\n")
## Weighting functions loaded successfully!
cat("✓ apply_women_weighting() - for categorical data with global percentages\n")
## ✓ apply_women_weighting() - for categorical data with global percentages
cat("✓ apply_compensation_weighting() - for numeric compensation/scale data\n")
## ✓ apply_compensation_weighting() - for numeric compensation/scale data
cat("✓ apply_boolean_weighting() - for TRUE/FALSE data\n")
## ✓ apply_boolean_weighting() - for TRUE/FALSE data

4 Group 1: Compensation Analysis (Plots 1-13)

4.1 Plot 1: Annual Base Salary by Gender

cat("Creating Plot 1: Annual Base Salary by Gender\n")
## Creating Plot 1: Annual Base Salary by Gender
salary_data <- apply_compensation_weighting(df_2025, "m21A[1_SQ001]")

# Calculate gender pay gap
women_salary <- salary_data %>% filter(gender == "Women") %>% pull(mean_salary)
men_salary <- salary_data %>% filter(gender == "Men") %>% pull(mean_salary)
salary_gap <- round((1 - women_salary/men_salary) * 100, 1)

p1 <- ggplot(salary_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = format_currency(mean_salary)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Annual Base Salary by Gender",
    subtitle = paste0("Women's salaries weighted by 2020 age distribution | Base salary gap: ", salary_gap, "%"),
    x = "Gender",
    y = "Mean Annual Base Salary",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p1)

4.2 Plot 2: Commission Earnings by Gender

cat("Creating Plot 2: Commission Earnings by Gender\n")
## Creating Plot 2: Commission Earnings by Gender
commission_data <- apply_compensation_weighting(df_2025, "m21A[3_SQ001]")

p2 <- ggplot(commission_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = format_currency(mean_salary)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Commission Earnings by Gender",
    subtitle = "Women's earnings weighted by 2020 age distribution | Performance-based compensation",
    x = "Gender",
    y = "Mean Annual Commission",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p2)

4.3 Plot 3: Bonus Earnings by Gender

cat("Creating Plot 3: Bonus Earnings by Gender\n")
## Creating Plot 3: Bonus Earnings by Gender
bonus_data <- apply_compensation_weighting(df_2025, "m21A[2_SQ001]")

p3 <- ggplot(bonus_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = format_currency(mean_salary)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Short-Term Bonus Earnings by Gender",
    subtitle = "Women's bonuses weighted by 2020 age distribution | Incentive compensation analysis",
    x = "Gender",
    y = "Mean Annual Bonus",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p3)

4.4 Plot 4: Total Annual Compensation

cat("Creating Plot 4: Total Annual Compensation\n")
## Creating Plot 4: Total Annual Compensation
total_comp_data <- apply_compensation_weighting(df_2025, "m28B")

# Calculate gender pay gap
women_total <- total_comp_data %>% filter(gender == "Women") %>% pull(mean_salary)
men_total <- total_comp_data %>% filter(gender == "Men") %>% pull(mean_salary)
pay_gap <- round((1 - women_total/men_total) * 100, 1)

p4 <- ggplot(total_comp_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = format_currency(mean_salary)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Total Annual Compensation by Gender",
    subtitle = paste0("Women's compensation weighted by 2020 age distribution | Gender pay gap: ", pay_gap, "%"),
    x = "Gender", 
    y = "Mean Total Compensation",
    caption = "Source: 2025 Survey Data | Includes base salary, bonus, and commission"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p4)

4.5 Plot 5: Compensation by Specialization

cat("Creating Plot 5: Compensation by Specialization\n")
## Creating Plot 5: Compensation by Specialization
spec_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m1B")

# Filter to top specializations and clean up
top_specs <- spec_comp_data %>%
  group_by(group) %>%
  summarise(avg_salary = mean(mean_salary), .groups = "drop") %>%
  top_n(8, avg_salary) %>%
  pull(group)

spec_comp_filtered <- spec_comp_data %>%
  filter(group %in% top_specs) %>%
  mutate(group = str_wrap(group, 20))

p5 <- ggplot(spec_comp_filtered, aes(x = reorder(group, mean_salary), y = mean_salary, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = format_currency(mean_salary)), 
            position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  coord_flip() +
  labs(
    title = "2025 Total Compensation by Specialization and Gender",
    subtitle = "Women's compensation weighted by 2020 age distribution | Top 8 specializations shown",
    x = "Specialization",
    y = "Mean Total Compensation",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.y = element_text(size = 9))

print(p5)

4.6 Plot 6: Compensation by Position Level

cat("Creating Plot 6: Compensation by Position Level\n")
## Creating Plot 6: Compensation by Position Level
position_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m3A")

# Clean position names and filter out non-employed categories
position_comp_filtered <- position_comp_data %>%
  filter(!group %in% c("Unemployed", "Retired")) %>%
  mutate(
    group = case_when(
      str_detect(group, "C-Suite") ~ "C-Suite",
      str_detect(group, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
      str_detect(group, "Senior level") ~ "Senior Level",
      str_detect(group, "Mid-level|Associate") ~ "Mid-Level/Associate", 
      str_detect(group, "Entry level") ~ "Entry Level",
      str_detect(group, "Self-employed|Independent") ~ "Self-Employed",
      TRUE ~ group
    )
  ) %>%
  filter(!is.na(group))

# Order positions hierarchically
position_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level", 
                   "VP/SVP/MD/Partner", "C-Suite", "Self-Employed")

position_comp_filtered <- position_comp_filtered %>%
  mutate(group = factor(group, levels = position_order))

p6 <- ggplot(position_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = format_currency(mean_salary)), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Total Compensation by Position Level and Gender",
    subtitle = "Women's compensation weighted by 2020 age distribution | Clear hierarchy shown",
    x = "Position Level",
    y = "Mean Total Compensation", 
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

print(p6)

4.7 Plot 7: Compensation by Age Groups

cat("Creating Plot 7: Compensation by Age Groups\n")
## Creating Plot 7: Compensation by Age Groups
age_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m25")

# Filter to age groups with data
age_comp_filtered <- age_comp_data %>%
  filter(!is.na(group)) %>%
  mutate(group = factor(group, levels = age_order))

p7 <- ggplot(age_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = format_currency(mean_salary)), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Total Compensation by Age Group and Gender",
    subtitle = "Women's compensation weighted by 2020 age distribution | Career progression visible",
    x = "Age Group",
    y = "Mean Total Compensation",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(p7)

4.8 Plot 8: Compensation by Years Experience

cat("Creating Plot 8: Compensation by Years Experience\n")
## Creating Plot 8: Compensation by Years Experience
# Note: Based on inspection, m4A only has "Less than one year" and "Other"
# Create a message about limited data
cat("Note: Years Experience data is limited to 'Less than one year' and 'Other' categories\n")
## Note: Years Experience data is limited to 'Less than one year' and 'Other' categories
exp_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m4A")

p8 <- ggplot(exp_comp_data, aes(x = group, y = mean_salary, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = format_currency(mean_salary)), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 4) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Total Compensation by Years Experience and Gender",
    subtitle = "Women's compensation weighted by 2020 age distribution | Limited experience categories available",
    x = "Years of Experience",
    y = "Mean Total Compensation",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Note: Limited experience data available"
  ) +
  theme_compensation

print(p8)

4.9 Plot 9: Compensation by Company Size

cat("Creating Plot 9: Compensation by Company Size\n")
## Creating Plot 9: Compensation by Company Size
company_comp_data <- apply_compensation_weighting(df_2025, "m28B", "m29")

# Clean company size categories and order logically
company_comp_filtered <- company_comp_data %>%
  filter(!group %in% c("Don't know") & !is.na(group)) %>%
  mutate(
    group = case_when(
      group == "Less than $100,000" ~ "<$100K",
      group == "$100,000-$249,999" ~ "$100K-$250K",
      group == "$250,000-$499,999" ~ "$250K-$500K",
      group == "$500,000 - $999,999" ~ "$500K-$1M",
      group == "$1 Million - $4.9 Million" ~ "$1M-$5M",
      group == "$5 Million - $9.9 Million" ~ "$5M-$10M",
      group == "$10 Million - $19.9 Million" ~ "$10M-$20M",
      group == "$20 Million - $49.9 Million" ~ "$20M-$50M",
      group == "$50 Million - $99.9 Million" ~ "$50M-$100M",
      group == "$100 Million - $299 Million" ~ "$100M-$300M",
      group == "$300 Million - $499 Million" ~ "$300M-$500M",
      group == "$500 Million - $999 Million" ~ "$500M-$1B",
      group == "More than $1 Billion" ~ ">$1B",
      TRUE ~ group
    )
  )

# Order by company size
size_order <- c("<$100K", "$100K-$250K", "$250K-$500K", "$500K-$1M", "$1M-$5M", 
                "$5M-$10M", "$10M-$20M", "$20M-$50M", "$50M-$100M", "$100M-$300M",
                "$300M-$500M", "$500M-$1B", ">$1B")

company_comp_filtered <- company_comp_filtered %>%
  mutate(group = factor(group, levels = size_order))

p9 <- ggplot(company_comp_filtered, aes(x = group, y = mean_salary, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = format_currency(mean_salary)), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 3, angle = 90) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = format_currency, expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Total Compensation by Company Size and Gender",
    subtitle = "Women's compensation weighted by 2020 age distribution | Annual revenue categories",
    x = "Company Annual Revenue",
    y = "Mean Total Compensation",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8))

print(p9)

4.10 Plot 10: Compensation Changes 2023-2024

cat("Creating Plot 10: Compensation Changes 2023-2024\n")
## Creating Plot 10: Compensation Changes 2023-2024
comp_change_data <- apply_women_weighting(df_2025, "m22A")

# Clean up the change categories
comp_change_filtered <- comp_change_data %>%
  filter(!is.na(response)) %>%
  mutate(
    response = case_when(
      response == "Increase:" ~ "Increased",
      response == "Decrease:" ~ "Decreased", 
      response == "Stayed the same" ~ "Stayed the Same",
      TRUE ~ response
    )
  )

p10 <- ggplot(comp_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Compensation Changes from 2023-2024 by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Global percentages shown",
    x = "Compensation Change Direction",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation

print(p10)

4.11 Plot 11: Projected 2025 Compensation Changes

cat("Creating Plot 11: Projected 2025 Compensation Changes\n")
## Creating Plot 11: Projected 2025 Compensation Changes
proj_change_data <- apply_women_weighting(df_2025, "m22C")

# Clean up the projected change categories
proj_change_filtered <- proj_change_data %>%
  filter(!is.na(response)) %>%
  mutate(
    response = case_when(
      response == "Increase:" ~ "Expect Increase",
      response == "Decrease:" ~ "Expect Decrease", 
      response == "Stay the same" ~ "Expect No Change",
      TRUE ~ response
    )
  )

p11 <- ggplot(proj_change_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Projected Compensation Changes by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Forward-looking expectations",
    x = "Expected Compensation Change",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation

print(p11)

4.12 Plot 12: Commission Willingness

cat("Creating Plot 12: Commission Willingness\n")
## Creating Plot 12: Commission Willingness
commission_data <- apply_women_weighting(df_2025, "m23B")

commission_filtered <- commission_data %>%
  filter(!is.na(response))

p12 <- ggplot(commission_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.5, size = 4, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Willingness to Accept Commission-Based Positions by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Career path preferences",
    x = "Commission Willingness",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation

print(p12)

4.13 Plot 13: Compensation Structure Role in Career

cat("Creating Plot 13: Compensation Structure Role in Career\n")
## Creating Plot 13: Compensation Structure Role in Career
comp_structure_data <- apply_women_weighting(df_2025, "m24")

# Clean up and wrap long text for compensation structure responses
comp_structure_filtered <- comp_structure_data %>%
  filter(!is.na(response)) %>%
  mutate(
    response = case_when(
      str_detect(response, "not part of the compensation structure") ~ "Commissions Not\nPart of Career",
      str_detect(response, "actively pursued") ~ "Actively Pursued\nCommission Career",
      str_detect(response, "does not play a significant role") ~ "Commission Role\nNot Significant",
      str_detect(response, "altered.*to avoid") ~ "Altered Career to\nAvoid Commission",
      response == "None of these" ~ "None of These",
      TRUE ~ str_wrap(response, 20)
    )
  )

p13 <- ggplot(comp_structure_filtered, aes(x = reorder(response, weighted_percent), y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  coord_flip() +
  labs(
    title = "2025 Role of Compensation Structure in Career Path by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Commission-related career decisions",
    x = "Career Path Relationship to Commission",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.y = element_text(size = 9))

print(p13)

5 Group 2: Career Trajectory & Satisfaction (Plots 14-22)

5.1 Plot 14: Career Aspirations by Gender

cat("Creating Plot 14: Career Aspirations by Gender\n")
## Creating Plot 14: Career Aspirations by Gender
career_asp_data <- apply_women_weighting(df_2025, "m4C")

# Clean up career aspiration categories
career_asp_filtered <- career_asp_data %>%
  filter(!is.na(response)) %>%
  mutate(
    response = case_when(
      str_detect(response, "C-Suite") ~ "C-Suite",
      str_detect(response, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
      str_detect(response, "Senior level") ~ "Senior Level",
      str_detect(response, "Mid-level|Associate") ~ "Mid-Level/Associate", 
      str_detect(response, "Entry level") ~ "Entry Level",
      str_detect(response, "Self-employed|Independent") ~ "Self-Employed",
      str_detect(response, "Not motivated") ~ "Not Title-Motivated",
      TRUE ~ response
    )
  )

# Order aspirations hierarchically
aspiration_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level", 
                     "VP/SVP/MD/Partner", "C-Suite", "Self-Employed", "Not Title-Motivated")

career_asp_filtered <- career_asp_filtered %>%
  mutate(response = factor(response, levels = aspiration_order))

p14 <- ggplot(career_asp_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 4, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Career Aspirations by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Peak career level goals",
    x = "Aspired Career Level",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

print(p14)

5.2 Plot 15: Career Aspirations by Age Group

cat("Creating Plot 15: Career Aspirations by Age Group\n")
## Creating Plot 15: Career Aspirations by Age Group
# Create age-career aspiration cross-tabulation with women's weighting
career_age_data <- df_2025 %>%
  select(gender, age, career_asp = m4C, age_group = m25) %>%
  filter(!is.na(career_asp) & !is.na(age_group) & !is.na(gender) & 
         age_group != "Decline to Answer" & career_asp != "") %>%
  mutate(
    career_asp_clean = case_when(
      str_detect(career_asp, "C-Suite") ~ "C-Suite",
      str_detect(career_asp, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
      str_detect(career_asp, "Senior level") ~ "Senior Level",
      str_detect(career_asp, "Mid-level|Associate") ~ "Mid-Level/Associate", 
      str_detect(career_asp, "Not motivated") ~ "Not Title-Motivated",
      TRUE ~ "Other"
    ),
    age_group = factor(age_group, levels = age_order)
  )

# Separate men and women for weighting
men_career_age <- career_age_data %>% 
  filter(gender == "Men") %>%
  count(age_group, career_asp_clean) %>%
  group_by(age_group) %>%
  mutate(percent = n / sum(n) * 100) %>%
  ungroup() %>%
  mutate(gender = "Men", weighted_percent = percent)

women_career_age <- career_age_data %>% 
  filter(gender == "Women") %>%
  count(age, career_asp_clean) %>%
  group_by(age) %>%
  mutate(percent_in_age = n / sum(n) * 100) %>%
  ungroup() %>%
  left_join(women_age_weights, by = "age") %>%
  mutate(weighted_contribution = percent_in_age * weight) %>%
  group_by(career_asp_clean) %>%
  summarise(weighted_percent = sum(weighted_contribution, na.rm = TRUE), .groups = "drop") %>%
  mutate(gender = "Women", age_group = "Overall Women (Weighted)")

# Combine for plotting - focus on key aspirations and age groups
career_age_combined <- bind_rows(
  men_career_age %>% select(age_group, career_asp_clean, gender, weighted_percent),
  women_career_age %>% select(age_group, career_asp_clean, gender, weighted_percent)
)

career_age_filtered <- career_age_combined %>%
  filter(career_asp_clean %in% c("C-Suite", "VP/SVP/MD/Partner", "Senior Level", "Mid-Level/Associate")) %>%
  filter(age_group %in% c("25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "Overall Women (Weighted)"))

p15 <- ggplot(career_age_filtered, aes(x = age_group, y = weighted_percent, fill = career_asp_clean)) +
  geom_col(position = "stack", alpha = 0.8) +
  facet_wrap(~gender, scales = "free_x") +
  scale_fill_viridis_d(name = "Career Aspiration", option = "plasma") +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.05))) +
  labs(
    title = "2025 Career Aspirations by Age Group and Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Stacked view of aspirations",
    x = "Age Group",
    y = "Percentage of Respondents",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
    strip.text = element_text(size = 12, face = "bold")
  )

print(p15)

5.3 Plot 16: Current Position by Gender

cat("Creating Plot 16: Current Position by Gender\n")
## Creating Plot 16: Current Position by Gender
current_pos_data <- apply_women_weighting(df_2025, "m3A")

# Clean up current position categories and filter out non-employed
current_pos_filtered <- current_pos_data %>%
  filter(!response %in% c("Unemployed", "Retired") & !is.na(response)) %>%
  mutate(
    response = case_when(
      str_detect(response, "C-Suite") ~ "C-Suite",
      str_detect(response, "SVP|Vice President|Managing Director|Partner") ~ "VP/SVP/MD/Partner",
      str_detect(response, "Senior level") ~ "Senior Level",
      str_detect(response, "Mid-level|Associate") ~ "Mid-Level/Associate", 
      str_detect(response, "Entry level") ~ "Entry Level",
      str_detect(response, "Self-employed|Independent") ~ "Self-Employed",
      TRUE ~ response
    )
  )

# Order positions hierarchically
position_order <- c("Entry Level", "Mid-Level/Associate", "Senior Level", 
                   "VP/SVP/MD/Partner", "C-Suite", "Self-Employed")

current_pos_filtered <- current_pos_filtered %>%
  mutate(response = factor(response, levels = position_order))

p16 <- ggplot(current_pos_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 4, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Current Position Distribution by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Career level representation",
    x = "Current Position Level",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

print(p16)

5.4 Plot 17: Success Level Self-Assessment

cat("Creating Plot 17: Success Level Self-Assessment\n")
## Creating Plot 17: Success Level Self-Assessment
success_data <- apply_compensation_weighting(df_2025, "m15[SQ001]")

p17 <- ggplot(success_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = round(mean_salary, 1)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Career Success Self-Assessment by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Low) - 10 (High)",
    x = "Gender",
    y = "Mean Success Level (1-10 Scale)",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p17)

5.5 Plot 18: Satisfaction with Career Success

cat("Creating Plot 18: Satisfaction with Career Success\n")
## Creating Plot 18: Satisfaction with Career Success
career_sat_data <- apply_compensation_weighting(df_2025, "m16[SQ001]")

p18 <- ggplot(career_sat_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = round(mean_salary, 1)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Satisfaction with Career Success by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Not Satisfied) - 10 (Very Satisfied)",
    x = "Gender",
    y = "Mean Satisfaction Level (1-10 Scale)",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p18)

5.6 Plot 19: Work/Life Balance Satisfaction

cat("Creating Plot 19: Work/Life Balance Satisfaction\n")
## Creating Plot 19: Work/Life Balance Satisfaction
balance_data <- apply_compensation_weighting(df_2025, "m16[SQ002]")

p19 <- ggplot(balance_data, aes(x = gender, y = mean_salary, fill = gender)) +
  geom_col(width = 0.6, alpha = 0.8) +
  geom_text(aes(label = round(mean_salary, 1)), 
            vjust = -0.5, size = 5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(limits = c(0, 10), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Work/Life Balance Satisfaction by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Scale: 1 (Not Satisfied) - 10 (Very Satisfied)",
    x = "Gender",
    y = "Mean Work/Life Balance Satisfaction (1-10 Scale)",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  guides(fill = "none")

print(p19)

5.7 Plot 20: Career Transitions

cat("Creating Plot 20: Career Transitions\n")
## Creating Plot 20: Career Transitions
career_trans_data <- apply_women_weighting(df_2025, "m4D")

# Clean up transition categories
career_trans_filtered <- career_trans_data %>%
  filter(!is.na(response)) %>%
  mutate(
    response = case_when(
      response == "0" ~ "No Transitions",
      response == "1" ~ "1 Transition",
      response == "2" ~ "2 Transitions",
      response == "3" ~ "3 Transitions",
      response %in% c("4", "5") ~ "4-5 Transitions",
      response %in% c("6", "7", "8", "9") ~ "6-9 Transitions",
      response == "10 or more" ~ "10+ Transitions",
      TRUE ~ response
    )
  )

# Order transitions logically
transition_order <- c("No Transitions", "1 Transition", "2 Transitions", "3 Transitions",
                     "4-5 Transitions", "6-9 Transitions", "10+ Transitions")

career_trans_filtered <- career_trans_filtered %>%
  mutate(response = factor(response, levels = transition_order))

p20 <- ggplot(career_trans_filtered, aes(x = response, y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Career Transitions (Company Changes) by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Job mobility patterns",
    x = "Number of Company Transitions",
    y = "Percentage of Respondents",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

print(p20)

5.8 Plot 21: Promotions & Lateral Moves

cat("Creating Plot 21: Promotions & Lateral Moves\n")
## Creating Plot 21: Promotions & Lateral Moves
# Process promotion and lateral move data
promo_data <- df_2025 %>%
  select(gender, age, 
         accepted_promos = `m12A[SQ001]`,
         declined_promos = `m12A[SQ002]`,
         accepted_lateral = `m12A[SQ003]`,
         declined_lateral = `m12A[SQ004]`) %>%
  filter(!is.na(gender) & !is.na(age)) %>%
  pivot_longer(cols = c(accepted_promos, declined_promos, accepted_lateral, declined_lateral),
               names_to = "move_type", values_to = "count") %>%
  filter(!is.na(count)) %>%
  mutate(
    move_type = case_when(
      move_type == "accepted_promos" ~ "Accepted Promotions",
      move_type == "declined_promos" ~ "Declined Promotions", 
      move_type == "accepted_lateral" ~ "Accepted Lateral Moves",
      move_type == "declined_lateral" ~ "Declined Lateral Moves"
    ),
    count = as.numeric(count)
  )

# Apply weighting for each move type
move_results <- map_dfr(unique(promo_data$move_type), function(move) {
  move_subset <- promo_data %>% filter(move_type == move)
  
  # Separate men and women
  men_stats <- move_subset %>% 
    filter(gender == "Men") %>%
    summarise(mean_count = mean(count, na.rm = TRUE), gender = "Men")
  
  # Women by age, then weighted
  women_by_age <- move_subset %>% 
    filter(gender == "Women") %>%
    group_by(age) %>%
    summarise(mean_count = mean(count, na.rm = TRUE), .groups = "drop") %>%
    left_join(women_age_weights, by = "age") %>%
    filter(!is.na(weight))
  
  women_weighted <- women_by_age %>%
    summarise(mean_count = sum(mean_count * weight, na.rm = TRUE), gender = "Women")
  
  bind_rows(men_stats, women_weighted) %>%
    mutate(move_type = move)
})

p21 <- ggplot(move_results, aes(x = move_type, y = mean_count, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = round(mean_count, 1)), 
            position = position_dodge(width = 0.7), vjust = -0.3, size = 3.5, fontface = "bold") +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "2025 Career Moves: Promotions & Lateral Moves by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | Mean number of moves",
    x = "Type of Career Move",
    y = "Mean Number of Moves",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 9))

print(p21)

5.9 Plot 22: Job Change Motivations

cat("Creating Plot 22: Job Change Motivations\n")
## Creating Plot 22: Job Change Motivations
# Process all motivation variables
motivation_cols <- c("m12C[SQ001]", "m12C[SQ002]", "m12C[SQ003]", 
                    "m12C[SQ004]", "m12C[SQ005]", "m12C[SQ006]", "m12C[SQ008]")

motivation_labels <- c(
  "m12C[SQ001]" = "Looking for Change",
  "m12C[SQ002]" = "Greater Advancement",
  "m12C[SQ003]" = "Flexible Schedule",
  "m12C[SQ004]" = "Better Compensation",
  "m12C[SQ005]" = "Lost Prior Job",
  "m12C[SQ006]" = "Gain Experience",
  "m12C[SQ008]" = "Other Reasons"
)

# Combine all motivations
motivation_results <- map_dfr(motivation_cols, function(col) {
  if (col %in% colnames(df_2025)) {
    result <- apply_boolean_weighting(df_2025, col)
    result$motivation = motivation_labels[col]
    return(result %>% filter(response == "Yes"))
  }
  return(NULL)
})

p22 <- ggplot(motivation_results, aes(x = reorder(motivation, weighted_percent), 
                                     y = weighted_percent, fill = gender)) +
  geom_col(position = "dodge", alpha = 0.8, width = 0.7) +
  geom_text(aes(label = paste0(round(weighted_percent, 1), "%")), 
            position = position_dodge(width = 0.7), hjust = -0.1, size = 3.5) +
  scale_fill_manual(values = comp_colors) +
  scale_y_continuous(labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.15))) +
  coord_flip() +
  labs(
    title = "2025 Job Change Motivations by Gender",
    subtitle = "Women's responses weighted by 2020 age distribution | % who selected each motivation",
    x = "Motivation for Job Change",
    y = "Percentage Who Selected This Motivation",
    fill = "Gender",
    caption = "Source: 2025 Survey Data | Women-only weighting applied"
  ) +
  theme_compensation +
  theme(axis.text.y = element_text(size = 9))

print(p22)

6 Executive Summary

cat("\n=== EXECUTIVE SUMMARY ===\n")
## 
## === EXECUTIVE SUMMARY ===
# Calculate key insights
base_salary_gap <- round((1 - women_salary/men_salary) * 100, 1)
total_comp_gap <- round((1 - women_total/men_total) * 100, 1)

# Success levels
success_levels <- apply_compensation_weighting(df_2025, "m15[SQ001]")
women_success <- success_levels %>% filter(gender == "Women") %>% pull(mean_salary)
men_success <- success_levels %>% filter(gender == "Men") %>% pull(mean_salary)

# Career satisfaction
career_satisfaction <- apply_compensation_weighting(df_2025, "m16[SQ001]")
women_career_sat <- career_satisfaction %>% filter(gender == "Women") %>% pull(mean_salary)
men_career_sat <- career_satisfaction %>% filter(gender == "Men") %>% pull(mean_salary)

# Work-life balance
worklife_balance <- apply_compensation_weighting(df_2025, "m16[SQ002]")
women_balance <- worklife_balance %>% filter(gender == "Women") %>% pull(mean_salary)
men_balance <- worklife_balance %>% filter(gender == "Men") %>% pull(mean_salary)

summary_data <- data.frame(
  Metric = c("Base Salary Gap (%)", "Total Compensation Gap (%)", 
             "Success Level (Women)", "Success Level (Men)",
             "Career Satisfaction (Women)", "Career Satisfaction (Men)",
             "Work-Life Balance (Women)", "Work-Life Balance (Men)"),
  Value = c(paste0(base_salary_gap, "%"), paste0(total_comp_gap, "%"),
            round(women_success, 1), round(men_success, 1),
            round(women_career_sat, 1), round(men_career_sat, 1),
            round(women_balance, 1), round(men_balance, 1))
)

kable(summary_data,
      caption = "Key Findings Summary",
      col.names = c("Key Metric", "Value"),
      align = c("l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE, position = "center") %>%
  column_spec(1, bold = TRUE) %>%
  row_spec(0, bold = TRUE, color = "white", background = "#3498db")
Key Findings Summary
Key Metric Value
Base Salary Gap (%) 19.9%
Total Compensation Gap (%) 42.8%
Success Level (Women) 7.5
Success Level (Men) 7.6
Career Satisfaction (Women) 7.4
Career Satisfaction (Men) 7.4
Work-Life Balance (Women) 7.2
Work-Life Balance (Men) 7.4
cat("\n=== KEY INSIGHTS ===\n")
## 
## === KEY INSIGHTS ===
cat("• Base salary gap:", base_salary_gap, "%\n")
## • Base salary gap: 19.9 %
cat("• Total compensation gap:", total_comp_gap, "%\n") 
## • Total compensation gap: 42.8 %
cat("• Women's self-assessed success level:", round(women_success, 1), "/10\n")
## • Women's self-assessed success level: 7.5 /10
cat("• Women's career satisfaction:", round(women_career_sat, 1), "/10\n")
## • Women's career satisfaction: 7.4 /10
cat("• Women's work-life balance satisfaction:", round(women_balance, 1), "/10\n")
## • Women's work-life balance satisfaction: 7.2 /10